Today we will make a chloropleth of the countries in a world map like in the article Coronavirus Map: Tracking the Global Outbreak that looks like this - world chloropleth

For this we will use the JHU CSSE Dataset

#hide_output
import pandas as pd
import geopandas as gpd
import altair as alt
import numpy as np
alt.renderers.set_embed_options(actions=False)

I made the following geojson file from the US State Department Global LSIB Polygons Detailes after simplifying it as it has too much details and is very large. Following is the code to do that.

Warning: Do NOT RUN THE FOLLOWING CELL. USe the geojson file I have provided - run the cell following the following cell.

#collapse
us_st_world = gpd.read_file('/home/walker/my_git_repos/fastpages-covidviz/_notebooks/shapes/Global_LSIB_Polygons_Detailed/Global_LSIB_Polygons_Detailed.dbf')
us_st_world.drop(['OBJECTID', 'Shape_Leng', 'Shape_Le_1', 'Shape_Area'], axis=1, inplace=True)
us_st_world["geometry"] = us_st_world.geometry.simplify(tolerance=0.05)
us_st_world.to_file("world.geojson", driver='GeoJSON')
#alt.Chart(us_st_world_).mark_geoshape(strokeWidth=1, stroke='white').encode().properties(width=1000, height=500).project('equalEarth')

world_geojson = 'https://raw.githubusercontent.com/armsp/covidviz/master/assets/world.geojson'
us_st_world = gpd.read_file(world_geojson)
uri = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
time_s_raw = pd.read_csv(uri)
time_s = time_s_raw.groupby('Country/Region').agg(dict(zip(time_s_raw.columns[4:], ['sum']*(len(time_s_raw.columns)-4))))
time_s = time_s.reset_index()
#time_s

Let's first find out what countries in our dataset are not present in the shapefile

time_s[time_s['Country/Region'].isin(us_st_world['COUNTRY_NA']) == False]
Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 6/8/20 6/9/20 6/10/20 6/11/20 6/12/20 6/13/20 6/14/20 6/15/20 6/16/20 6/17/20
5 Antigua and Barbuda 0 0 0 0 0 0 0 0 0 ... 26 26 26 26 26 26 26 26 26 26
11 Bahamas 0 0 0 0 0 0 0 0 0 ... 103 103 103 103 103 103 103 103 104 104
21 Bosnia and Herzegovina 0 0 0 0 0 0 0 0 0 ... 2704 2728 2775 2832 2893 2893 2893 3040 3085 3141
33 Central African Republic 0 0 0 0 0 0 0 0 0 ... 1850 1850 1888 1952 2044 2057 2057 2222 2410 2564
39 Congo (Brazzaville) 0 0 0 0 0 0 0 0 0 ... 683 728 728 728 728 728 728 883 883 883
40 Congo (Kinshasa) 0 0 0 0 0 0 0 0 0 ... 4106 4259 4390 4515 4637 4724 4778 4837 4974 5100
48 Diamond Princess 0 0 0 0 0 0 0 0 0 ... 712 712 712 712 712 712 712 712 712 712
58 Eswatini 0 0 0 0 0 0 0 0 0 ... 340 371 398 449 472 486 490 506 520 563
64 Gambia 0 0 0 0 0 0 0 0 0 ... 28 28 28 28 28 28 28 30 34 34
75 Holy See 0 0 0 0 0 0 0 0 0 ... 12 12 12 12 12 12 12 12 12 12
104 MS Zaandam 0 0 0 0 0 0 0 0 0 ... 9 9 9 9 9 9 9 9 9 9
127 North Macedonia 0 0 0 0 0 0 0 0 0 ... 3152 3239 3364 3538 3701 3895 4057 4157 4299 4482
142 Saint Kitts and Nevis 0 0 0 0 0 0 0 0 0 ... 15 15 15 15 15 15 15 15 15 15
143 Saint Lucia 0 0 0 0 0 0 0 0 0 ... 19 19 19 19 19 19 19 19 19 19
144 Saint Vincent and the Grenadines 0 0 0 0 0 0 0 0 0 ... 27 27 27 27 27 27 27 27 29 29
146 Sao Tome and Principe 0 0 0 0 0 0 0 0 0 ... 513 514 611 632 639 659 661 662 671 683
165 Taiwan* 1 1 3 3 4 5 8 8 9 ... 443 443 443 443 443 443 443 445 445 445
171 Trinidad and Tobago 0 0 0 0 0 0 0 0 0 ... 117 117 117 117 117 117 123 123 123 123
174 US 1 1 2 2 5 5 5 5 5 ... 1961781 1979868 2000702 2023590 2048986 2074526 2094058 2114026 2137731 2163290
183 West Bank and Gaza 0 0 0 0 0 0 0 0 0 ... 473 481 485 487 489 489 492 505 514 555
184 Western Sahara 0 0 0 0 0 0 0 0 0 ... 9 9 9 9 9 9 9 9 9 9

21 rows × 149 columns

Now we need to understand that the monikers of the countries can change and that we need to figure out how to unify them and then merge them. For that let's study each of the missing countries one by one like so -

us_st_world[us_st_world['COUNTRY_NA'].str.startswith('Antigua')]
COUNTRY_NA geometry
8 Antigua & Barbuda MULTIPOLYGON (((-62.34839 16.93286, -62.35303 ...

Do the same technique for all the contries and you'd end up with the following modifications -

time_s.loc[time_s['Country/Region']=='Taiwan*', 'Country/Region'] = 'Taiwan'
time_s.loc[time_s['Country/Region']=='US', 'Country/Region'] = 'United States'
time_s.loc[time_s['Country/Region']=='Czech Republic', 'Country/Region'] = 'Czechia'
time_s.loc[time_s['Country/Region']=='West Bank and Gaza', 'Country/Region'] = 'West Bank (disp)'
time_s.loc[time_s['Country/Region']=='Western Sahara', 'Country/Region'] = 'Western Sahara (disp)'
time_s.loc[time_s['Country/Region']=='Trinidad and Tobago', 'Country/Region'] = 'Trinidad & Tobago'
time_s.loc[time_s['Country/Region']=='Sao Tome and Principe', 'Country/Region'] = 'Sao Tome & Principe'
time_s.loc[time_s['Country/Region']=='Saint Vincent and the Grenadines', 'Country/Region'] = 'St Vincent & the Grenadines'
time_s.loc[time_s['Country/Region']=='Saint Lucia', 'Country/Region'] = 'St Lucia'
time_s.loc[time_s['Country/Region']=='Saint Kitts and Nevis', 'Country/Region'] = 'St Kitts & Nevis'
time_s.loc[time_s['Country/Region']=='North Macedonia', 'Country/Region'] = 'Macedonia'
time_s.loc[time_s['Country/Region']=='Bahamas', 'Country/Region'] = 'Bahamas, The'
time_s.loc[time_s['Country/Region']=='Bosnia and Herzegovina', 'Country/Region'] = 'Bosnia & Herzegovina'
time_s.loc[time_s['Country/Region']=='Central African Republic', 'Country/Region'] = 'Central African Rep'
time_s.loc[time_s['Country/Region']=='Eswatini', 'Country/Region'] = 'Swaziland'
#time_s.loc[time_s['Country/Region']=='South Korea', 'Country/Region'] = 'Korea, South'
time_s.loc[time_s['Country/Region']=='Congo (Kinshasa)', 'Country/Region'] = 'Congo, Dem Rep of the'
time_s.loc[time_s['Country/Region']=='Congo (Brazzaville)', 'Country/Region'] = 'Congo, Rep of the'
time_s.loc[time_s['Country/Region']=='Antigua and Barbuda', 'Country/Region'] = 'Antigua & Barbuda'

TODO

Can we do the above using code instead of manually?

We will ignore the following due to very few cases

# collapse
time_s[time_s['Country/Region'].isin(us_st_world['COUNTRY_NA']) == False]

Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 6/8/20 6/9/20 6/10/20 6/11/20 6/12/20 6/13/20 6/14/20 6/15/20 6/16/20 6/17/20
48 Diamond Princess 0 0 0 0 0 0 0 0 0 ... 712 712 712 712 712 712 712 712 712 712
64 Gambia 0 0 0 0 0 0 0 0 0 ... 28 28 28 28 28 28 28 30 34 34
75 Holy See 0 0 0 0 0 0 0 0 0 ... 12 12 12 12 12 12 12 12 12 12
104 MS Zaandam 0 0 0 0 0 0 0 0 0 ... 9 9 9 9 9 9 9 9 9 9

4 rows × 149 columns

Finding cases per day -

time_s_T = time_s.set_index('Country/Region').T
time_s_T = time_s_T.apply(lambda x: x.diff(), axis=0)

Averageing the cases over a week -

# hide_output
roll_case_avg_list = []
def roll_case_avg(row):
    #print(row)
    avgs = row[::-1].rolling(window=7).mean().apply(np.floor).shift(-6)
    roll_case_avg_list.append((row.name, avgs.iloc[0], avgs.iloc[14]))
    #print(avgs.iloc[1], avgs.iloc[8])

p = time_s_T.T
p.apply(roll_case_avg, axis=1)
#roll_case_avg_list

I asked the NYT GitHub Team on how they are establishing the category colors and based on their input we will use the following classification -

The thresholds for that change are:

  • Blue: < -15%
  • Yellow: > -15% and < +15%
  • Light orange: >+15% and <+100%
  • Mid orange: >+100% and <+200%
  • Dark red: >+200%
def categorize(x):
    if x['now'] == 0 or x['ago'] == 0:#x['ago'] <= 5 or
        return 'Few or no cases'
    delta = x['diff']/x['ago']*100
    if delta < -15:
        return 'Declining'
    elif delta > -15 and delta < 15:
        return 'About the same'
    elif delta > 15 and delta < 100:
        return 'Growth upto 2x'
    elif delta > 100 and delta < 200:
        return 'Growth upto 3x'
    elif delta > 200:
        return 'Growth more than 3x'
test2 = pd.DataFrame(roll_case_avg_list, columns=['country','now','ago'])
test2['diff'] = test2['now'] - test2['ago']
test2['category'] = test2.apply(categorize, axis=1)
test2.groupby('category').count()
country now ago diff
category
About the same 23 23 23 23
Declining 44 44 44 44
Few or no cases 42 42 42 42
Growth more than 3x 14 14 14 14
Growth upto 2x 44 44 44 44
Growth upto 3x 15 15 15 15
test2.columns = ['COUNTRY_NA',	'now', 'ago', 'diff', 'category']
plot2 = us_st_world.merge(test2, how='left', on='COUNTRY_NA')
plot2
COUNTRY_NA geometry now ago diff category
0 Abyei (disp) POLYGON ((29.00000 9.67356, 28.78724 9.49406, ... NaN NaN NaN NaN
1 Afghanistan POLYGON ((70.98955 38.49070, 71.37353 38.25597... 676.0 687.0 -11.0 About the same
2 Akrotiri (UK) POLYGON ((32.83539 34.70576, 32.98961 34.67999... NaN NaN NaN NaN
3 Aksai Chin (disp) MULTIPOLYGON (((78.69853 34.09310, 78.69837 34... NaN NaN NaN NaN
4 Albania POLYGON ((19.72764 42.66045, 19.79268 42.48135... 54.0 19.0 35.0 Growth upto 3x
... ... ... ... ... ... ...
274 Burma MULTIPOLYGON (((98.03206 9.83411, 98.06033 9.8... 2.0 3.0 -1.0 Declining
275 India MULTIPOLYGON (((93.84583 7.24456, 93.96289 7.0... 11477.0 8391.0 3086.0 Growth upto 2x
276 Benin POLYGON ((2.84088 12.40599, 3.26927 12.01606, ... 38.0 4.0 34.0 Growth more than 3x
277 Niger POLYGON ((12.02686 23.50849, 13.52600 23.15616... 6.0 1.0 5.0 Growth more than 3x
278 Nigeria MULTIPOLYGON (((6.13707 4.37177, 6.08799 4.359... 551.0 347.0 204.0 Growth upto 2x

279 rows × 6 columns

Now we are ready to plot the chloropleth -

# collapse
base=alt.Chart(plot2).mark_geoshape(stroke='white').transform_filter((alt.datum.COUNTRY_NA != 'Antarctica')).encode(
    color = alt.Color('category:N', 
                      scale=alt.Scale(
                          domain=['Few or no cases', 'Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'], 
                          range=['#f2f2f2', '#badee8', '#f2df91', '#ffae43', '#ff6e0b', '#ce0a05']
                          ),
                    legend=alt.Legend(title=None, orient='top', labelBaseline='middle', symbolType='square', columnPadding=20, labelFontSize=15, gridAlign='each', symbolSize=200)
                     ),
    tooltip = ['COUNTRY_NA', alt.Tooltip('now:Q', format='.0d'), alt.Tooltip('ago:Q', format='.0d'), 'category']
    ).properties(height=800, width=1500).project('equalEarth').configure_view(strokeWidth=0)

base

We can do something even more interesting...we can make the chart interactive by highlighting the countries based on their category - Falling, Almost the same, Rising 1, Rising 2 and Rising 3.

#collapse
selector = alt.selection_single(
    fields=['category'], 
    empty='all',
    bind='legend'
)

interactive = base.encode(
    color = alt.Color(
        'category:N',
        legend=alt.Legend(values=['Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'], title=None, orient='top', labelBaseline='middle', symbolType='square', columnPadding=20, labelFontSize=15, gridAlign='each', symbolSize=200),
        scale=alt.Scale(
            domain=['Few or no cases', 'Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'], 
            range=['#f2f2f2', '#badee8', '#f2df91', '#ffae43', '#ff6e0b', '#ce0a05']
            )
        ),
    opacity=alt.condition(selector, alt.value(1), alt.value(0.25))
    ).add_selection(
    selector
)

Now click on the legend to highlight the countries for that category.

interactive